#############################
# Data processing functions
#############################

# Data pre-processing
preprocez <- function(X,ranks="ALL"){
    # Keep only active combat divisions
    X_ <- X[STATUS%in%"ACTIVE"&N_SOLDIERS>0,]
    # New ID
    X_[,DIV_ARMY := UNIT_UA]
    X_[,UNIT_UA := paste0(UNIT_NAME," ",ARMY) %>% stringr::str_squish()]
    # Define DV names and labels
    yvarz <<- c("KIA_100","WIA_100","MIA_100","POW_100","DES_100","PUN_100","WMED_100")
    ylabz <<- c("Killed in Action","Wounded in Action","Missing in Action","Prisoner of War","Desertion", "Punishment","Medals")
    ylabz_short <<- c("\\textbf{KIA}","\\textbf{WIA}","\\textbf{MIA}","\\textbf{POW}","\\textbf{Desert}", "\\textbf{Punish}","\\textbf{Medals}")
    # Break down by ranks
    if(ranks=="ALL"){
        X_[,outcome_OK2 := 1-outcome_KWIA-outcome_MIAPOWDDTPUN]
        X_[,outcome_FF := outcome_KWIA-outcome_MIAPOWDDTPUN]
        X_[,eval(yvarz):=.(outcome_KIA*100,outcome_WIA*100,outcome_MIA*100,outcome_POW*100,outcome_DESERT*100,outcome_PUNISHED*100,medals_new*100)]
    }
    if(ranks=="O"){
        X_[,outcome_OK2 := 1-outcome_KWIA_O-outcome_MIAPOWDDTPUN_O]
        X_[,outcome_FF := outcome_KWIA_O-outcome_MIAPOWDDTPUN_O]
        X_[,eval(yvarz):=.(outcome_KIA_O*100,outcome_WIA_O*100,outcome_MIA_O*100,outcome_POW_O*100,outcome_DESERT_O*100,outcome_PUNISHED_O*100,medals_new_O*100)]
    }
    if(ranks=="E"){
        X_ <- X_ %>% .[,N_O:=outcome_KIA_O_SUM/outcome_KIA_O] %>% .[,N_E := N_SOLDIERS-N_O] %>% .[,outcome_KIA_E_SUM := outcome_KIA_SUM-outcome_KIA_O_SUM] %>% .[,outcome_KIA_E := outcome_KIA_E_SUM/N_E] %>% .[,outcome_WIA_E_SUM := outcome_WIA_SUM-outcome_WIA_O_SUM] %>% .[,outcome_WIA_E := outcome_WIA_E_SUM/N_E] %>% .[,outcome_MIA_E_SUM := outcome_MIA_SUM-outcome_MIA_O_SUM] %>% .[,outcome_MIA_E := outcome_MIA_E_SUM/N_E] %>% .[,outcome_POW_E_SUM := outcome_POW_SUM-outcome_POW_O_SUM] %>% .[,outcome_POW_E := outcome_POW_E_SUM/N_E] %>% .[,outcome_DESERT_E_SUM := outcome_DESERT_SUM-outcome_DESERT_O_SUM] %>% .[,outcome_DESERT_E := outcome_DESERT_E_SUM/N_E] %>% .[,outcome_PUNISHED_E_SUM := outcome_PUNISHED_SUM-outcome_PUNISHED_O_SUM] %>% .[,outcome_PUNISHED_E := outcome_PUNISHED_E_SUM/N_E] %>% .[,medals_new_E_SUM := medals_new_SUM-medals_new_O_SUM] %>% .[,medals_new_E := medals_new_E_SUM/N_E] %>% .[,outcome_KWIA_E_SUM := outcome_KWIA_SUM-outcome_KWIA_O_SUM] %>% .[,outcome_KWIA_E := outcome_KWIA_E_SUM/N_E] %>% .[,outcome_MIAPOWDDTPUN_E_SUM := outcome_MIAPOWDDTPUN_SUM-outcome_MIAPOWDDTPUN_O_SUM] %>% .[,outcome_MIAPOWDDTPUN_E := outcome_MIAPOWDDTPUN_E_SUM/N_E]
        X_[,outcome_OK2 := 1-outcome_KWIA_E-outcome_MIAPOWDDTPUN_E]
        X_[,outcome_FF := outcome_KWIA_E-outcome_MIAPOWDDTPUN_E]
        X_[,eval(yvarz):=.(outcome_KIA_E*100,outcome_WIA_E*100,outcome_MIA_E*100,outcome_POW_E*100,outcome_DESERT_E*100,outcome_PUNISHED_E*100,medals_new_E*100)]
    }
    # New variables
    X_[,RUS_100 := russian_jw*100]
    X_[,NKVD_OO_ := NKVD_OO > mean(NKVD_OO),by = UNIT_UA]
    X_[,POP_DENSITY := C1926_TOTAL_POP/(25^2)]
    X_[,AGE_1941 := 1941-dob]
    X_[,YRMO_date := as.Date(paste0(as.character(YRMO), '01'), format='%Y%m%d')]
    X_[,TID := YRMO_date %>% as.factor() %>% as.numeric()]
    # Echelons
    X_[,ECH_BAT := 1*grepl("б\\b",UNIT_UA)]
    X_[,ECH_BDE := 1*grepl("бр\\b",UNIT_UA)*(1-ECH_BAT)]
    X_[,ECH_RGT := 1*grepl("п\\b",UNIT_UA)*(1-ECH_BAT)*(1-ECH_BDE)]
    X_[,ECH_DIV := 1*grepl("д\\b",UNIT_UA)*(1-ECH_BAT)*(1-ECH_BDE)*(1-ECH_RGT)]
    X_[,ECH_CPS := 1*grepl("к\\b",UNIT_UA)*(1-ECH_BAT)*(1-ECH_BDE)*(1-ECH_RGT)*(1-ECH_DIV)]

    # Remove duplicates
    X_ <- X_ %>% .[!duplicated(paste0(UNIT_UA,"_",YRMO))]

    return(X_)
}

# NA swap
naswapz <- function(data,varz,na_val=-99){
    if(!"data.table"%in%class(data)){data <- as.data.table(data)}
    for(j in varz){data.table::set(data,which(is.na(data[[j]])),j,na_val)}
    return(data)
}

#############################
# Data analysis functions
#############################

# ICC
icc_fun <- function(mod){icc_ <- nlme::VarCorr(mod) %>% data.table::as.data.table() %>%  .[,icc:=vcov/sum(vcov)] %>% .[,.(grp, vcov, icc)]; return(icc_)}

# Interaction term standard errors
ix_se <- function(vcv,x0,x1){
    sqrt(vcv[grep(x0,rownames(vcv)),grep(x0,rownames(vcv))]+vcv[grep(x1,rownames(vcv)),grep(x1,rownames(vcv))]+2*vcv[grep(x0,rownames(vcv)),grep(x1,rownames(vcv))])
}

# Hausman test for felm
phtest_felm <- function(glmerMod, felmMod, ...){  ## changed function call
    coef.wi <- coef(felmMod)
    coef.re <- fixef(glmerMod)  ## changed coef() to fixef() for glmer
    vcov.wi <- vcov(felmMod)
    vcov.re <- vcov(glmerMod)
    names.wi <- names(coef.wi)
    names.re <- names(coef.re)
    coef.h <- names.re[names.re %in% names.wi]
    dbeta <- coef.wi[coef.h] - coef.re[coef.h]
    df <- length(dbeta)
    dvcov <- vcov.re[coef.h, coef.h] - vcov.wi[coef.h, coef.h]
    stat <- abs(t(dbeta) %*% as.matrix(solve(dvcov)) %*% dbeta)  ## added as.matrix()
    pval <- pchisq(stat, df = df, lower.tail = FALSE)
    names(stat) <- "chisq"
    parameter <- df
    names(parameter) <- "df"
    alternative <- "one model is inconsistent"
    res <- list(statistic = stat, p.value = pval, parameter = parameter, 
        method = "Hausman Test",  alternative = alternative,
                data.name=deparse(getCall(glmerMod)$data))  ## changed
    class(res) <- "htest"
    return(res)
}

# Time lags
tlagz <- function(data,unit_id,time_id,t_lags=3,lagvarz,ncores=1,drop_dups=TRUE){
    if(!"data.table"%in%class(data)){data <- as.data.table(data)}
    data <- data[order(get(unit_id), get(time_id)),] 
    if(drop_dups){
        data <- data[!(X_[,.(get(unit_id), get(time_id)) ] %>% duplicated()),]
    }
    out <- mclapply(unique(data[,get(unit_id)]),function(i0){#print(i0)
        X1 <- data[get(unit_id)==i0,]
        tmat <- lapply(1:t_lags,function(t_){
            if(nrow(X1)<=t_){X2 <- X1[,.SD,.SDcols=lagvarz] %>% .[1:min(t_,.N),eval(lagvarz):=NA] %>% data.table::setnames(paste0(lagvarz,"_t",t_))}
            if(nrow(X1)>t_){
                X2 <- X1 %>% .[c(1:t_,1:(.N-t_)),.SD,.SDcols=lagvarz] %>% .[1:t_,eval(lagvarz):=NA] %>% data.table::setnames(paste0(lagvarz,"_t",t_))       
                }
            X2
            }) %>% dplyr::bind_cols()
        bind_cols(X1,tmat)
    },mc.cores=ncores) %>% dplyr::bind_rows()
    return(out)
}

# Match balance table
balancetabz <- function(YTX,m_pairs,full_data,varz_id,treatvar,balvarz,ballabz=balvarz,naswap=FALSE,latex=FALSE,filz=NULL,rnd=3,nboot=100,tests="ks"){
    if(!"data.table"%in%class(full_data)){full_data <- as.data.table(full_data)}
    # Add missing columns
    dataz_pre <- merge(YTX,full_data[,.SD,.SDcols=c(varz_id,balvarz[!balvarz%in%names(YTX)])],by=varz_id,all.x=T,all.y=F)
    dataz_post <- merge(YTX[c(m_pairs$index.treated,m_pairs$index.control)],full_data[,.SD,.SDcols=c(varz_id,balvarz[!balvarz%in%names(YTX)])],by=varz_id,all.x=T,all.y=F)
    # Recode to match on NA's
    if(naswap==TRUE){dataz_pre <- naswapz(dataz_pre,varz=balvarz);dataz_post <- naswapz(dataz_post,varz=balvarz)}
    # Calculate balance stats
    suppressWarnings({
        t.pre <- dataz_pre[,lapply(.SD,function(x){mean(x[get(treatvar)==1],na.rm=T)}),.SDcols=balvarz]
        c.pre <- dataz_pre[,lapply(.SD,function(x){mean(x[get(treatvar)==0],na.rm=T)}),.SDcols=balvarz]
        t.post <- dataz_post[,lapply(.SD,function(x){mean(x[get(treatvar)==1],na.rm=T)}),.SDcols=balvarz]
        c.post <- dataz_post[,lapply(.SD,function(x){mean(x[get(treatvar)==0],na.rm=T)}),.SDcols=balvarz]
        sdd.pre <- dataz_pre[,lapply(.SD,function(x){(mean(x[get(treatvar)==1],na.rm=T)-mean(x[get(treatvar)==0],na.rm=T))/sd(x[get(treatvar)==1],na.rm=T)}),.SDcols=balvarz]
        sdd.post <- dataz_post[,lapply(.SD,function(x){(mean(x[get(treatvar)==1],na.rm=T)-mean(x[get(treatvar)==0],na.rm=T))/sd(x[get(treatvar)==1],na.rm=T)}),.SDcols=balvarz]
        tt.pre <- dataz_pre[,lapply(.SD,function(x){
            t.test(x[get(treatvar)==1],x[get(treatvar)==0])$statistic  %>% round(rnd) %>% paste0(t.test(x[get(treatvar)==1],x[get(treatvar)==0])$p.value  %>% findInterval(.,c(0,.001,.01,.05,.1,1)) %>% c("**","**","*","'","","")[.])
            }),.SDcols=balvarz]
        tt.post <- dataz_post[,lapply(.SD,function(x){
            t.test(x[get(treatvar)==1],x[get(treatvar)==0])$statistic  %>% round(rnd) %>% paste0(t.test(x[get(treatvar)==1],x[get(treatvar)==0])$p.value  %>% findInterval(.,c(0,.001,.01,.05,.1,1)) %>% c("**","**","*","'","","")[.])
            }),.SDcols=balvarz]
        ks.pre <- dataz_pre[,lapply(.SD,function(x){
            ks.test(x[get(treatvar)==1],x[get(treatvar)==0])$statistic  %>% round(rnd) %>% paste0(ks.test(x[get(treatvar)==1],x[get(treatvar)==0])$p.value  %>% findInterval(.,c(0,.001,.01,.05,.1,1)) %>% c("**","**","*","'","","")[.])
            }),.SDcols=balvarz]
        ks.post <- dataz_post[,lapply(.SD,function(x){
            ks.test(x[get(treatvar)==1],x[get(treatvar)==0])$statistic  %>% round(rnd) %>% paste0(ks.test(x[get(treatvar)==1],x[get(treatvar)==0])$p.value  %>% findInterval(.,c(0,.001,.01,.05,.1,1)) %>% c("**","**","*","'","","")[.])
            }),.SDcols=balvarz]
    })

    # Extract stats
    match_bal <- lapply(1:(length(balvarz)),function(i0){#print(i0)
        data.table(
            X=c(ballabz[i0],""),
            match=c("pre","post"),
            mean_T=c(t.pre %>% unlist() %>% .[i0] %>% smart_round(rnd),
                t.post %>% unlist() %>% .[i0] %>% smart_round(rnd)),
            mean_C=c(c.pre %>% unlist() %>% .[i0] %>% smart_round(rnd),
                c.post %>% unlist() %>% .[i0] %>% smart_round(rnd)),
            std_diff=c(sdd.pre %>% unlist() %>% .[i0] %>% smart_round(rnd),
                sdd.post %>% unlist() %>% .[i0] %>% smart_round(rnd)),
            ks=c(ks.pre %>% unlist() %>% .[i0] ,
                ks.post %>% unlist() %>% .[i0] ),
            t=c(tt.pre %>% unlist() %>% .[i0] ,
                tt.post %>% unlist() %>% .[i0] )
        )
    }) %>% dplyr::bind_rows()
    match_bal %>% data.table::setnames(c("Covariate","Status","Mean T","Mean C","Std. Diff.","KS Statistic","T Statistic"))
    if(tests=="ks"){
        match_bal <- match_bal[,.SD,.SDcols=c("Covariate","Status","Mean T","Mean C","Std. Diff.","KS Statistic")]
    }
    if(tests=="t"){
        match_bal <- match_bal[,.SD,.SDcols=c("Covariate","Status","Mean T","Mean C","Std. Diff.","T Statistic")]
    }

    # Return table
    if(latex==FALSE){
        return(match_bal)
    }

    # Return LaTeX
    if(latex==TRUE&length(tests)==1){
        if(length(filz)==0){filz <- tempfile()}
        xmat <- xtable(match_bal,caption = "{\\bf Covariate Balance Statistics, Pre- and Post-Matching.}",label="tab:match_bal",align=paste0("lp{2.5cm}p{3.5cm}p{1.75cm}p{1.75cm}p{1.75cm}p{1.75cm}")) %>% data.table::setnames(paste0("\\multicolumn{1}{c}{",names(.),"}")) 
        for(r0 in c(1:nrow(xmat))){xmat[r0,] <- paste0("\\multicolumn{1}{c}{",xmat[r0,],"}")}
        xmat <- xmat %>%  print(.,hline.after=NULL,add.to.row=list(pos=list(-1,0, nrow(match_bal)),command=c("\\toprule ","\\midrule ", "\\bottomrule ")),sanitize.colnames.function = identity,sanitize.text.function=identity,caption.placement="top",size="\\small",include.rownames=FALSE)
        note <- paste("Standardized difference (Std. Diff.) defined as $\\frac{\\mbox{mean}(T)-\\mbox{mean}(C)}{\\mbox{sd}(T)}$. Significance levels (two-tailed): $^{\\dagger} p < 0.1$; $^{*} p < 0.05$; $^{**} p < 0.01$.\n}", sep = "")
        cat(gsub("\\end{tabular}\n", paste("\\end{tabular}\n\\caption*{{\\small ", note, "}\n", sep = ""), xmat, fixed = TRUE), sep = "\n", file =  filz)
        return(xmat)
    }
    if(latex==TRUE&length(tests)==2){
        if(length(filz)==0){filz <- tempfile()}
        xmat <- xtable(match_bal,caption = "{\\bf Covariate Balance Statistics, Pre- and Post-Matching.}",label="tab:match_bal",align=paste0("lp{2.5cm}p{3.5cm}p{1.75cm}p{1.75cm}p{1.75cm}p{1.75cm}p{1.75cm}")) %>% data.table::setnames(paste0("\\multicolumn{1}{c}{",names(.),"}")) 
        for(r0 in c(1:nrow(xmat))){xmat[r0,] <- paste0("\\multicolumn{1}{c}{",xmat[r0,],"}")}
        xmat <- xmat %>%  print(.,hline.after=NULL,add.to.row=list(pos=list(-1,0, nrow(match_bal)),command=c("\\toprule ","\\midrule ", "\\bottomrule ")),sanitize.colnames.function = identity,sanitize.text.function=identity,caption.placement="top",size="\\small",include.rownames=FALSE)
        note <- paste("Standardized difference (Std. Diff.) defined as $\\frac{\\mbox{mean}(T)-\\mbox{mean}(C)}{\\mbox{sd}(T)}$. Significance levels (two-tailed): $^{\\dagger} p < 0.1$; $^{*} p < 0.05$; $^{**} p < 0.01$.\n}", sep = "")
        cat(gsub("\\end{tabular}\n", paste("\\end{tabular}\n\\caption*{{\\small ", note, "}\n", sep = ""), xmat, fixed = TRUE), sep = "\n", file =  filz)
        return(xmat)
    }
}


# Extract matched pairs
extract_pairz <- function(YTX,dataz,m_pairs,idvarz,commonvarz,allvarz,alllabz,pair_listz=FALSE){
    varz <- c(allvarz %>% .[.%in%names(dataz)])
    x_matched <- bind_cols(
        YTX[m_pairs$index.treated,.SD,.SDcols=c(idvarz)] %>% data.table::setnames(idvarz,paste0(idvarz,"_t")),
        YTX[m_pairs$index.control,.SD,.SDcols=c(idvarz,commonvarz)] %>% data.table::setnames(idvarz,paste0(idvarz,"_c")),
        YTX[m_pairs$index.treated,apply(.SD,1,function(x){paste0(x,collapse=" ")}),.SDcols=c(idvarz,commonvarz)] %>% match(dataz[,apply(.SD,1,function(x){paste0(x,collapse=" ")}),.SDcols=c(idvarz,commonvarz)]) %>% dataz[.,.SD,.SDcols=varz] %>% data.table::setnames(varz,paste0(varz,"_t")),
        YTX[m_pairs$index.control,apply(.SD,1,function(x){paste0(x,collapse=" ")}),.SDcols=c(idvarz,commonvarz)] %>% match(dataz[,apply(.SD,1,function(x){paste0(x,collapse=" ")}),.SDcols=c(idvarz,commonvarz)]) %>% dataz[.,.SD,.SDcols=varz] %>% data.table::setnames(varz,paste0(varz,"_c"))
        ) %>%
        .[,eval(paste0(c(yvarz,tvarz_),"_diff")) := lapply(c(yvarz,tvarz_),function(x){get(paste0(x,"_t"))-get(paste0(x,"_c"))})] %>% 
        .[,paste0(tvarz_,"_ratio") := lapply(tvarz_,function(x){get(paste0(x,"_t"))/get(paste0(x,"_c"))})] %>% 
        .[,TOP10 := 1*(get(paste0(tvarz_[1],"_diff")) %in% (get(paste0(tvarz_[1],"_diff")) %>% sort() %>% rev() %>% .[1:10]))] %>% 
        .[,TOP10_OFF := 1*(OFFENSE==1&get(paste0(tvarz_[1],"_diff")) %in% (get(paste0(tvarz_[1],"_diff"))[OFFENSE==1] %>% sort() %>% rev() %>% .[1:10]))] %>%
        .[,TOP10_DEF := 1*(DEFENSE==1&get(paste0(tvarz_[1],"_diff")) %in% (get(paste0(tvarz_[1],"_diff"))[DEFENSE==1] %>% sort() %>% rev() %>% .[1:10]))]
    if(pair_listz==TRUE){
        pair_list <- lapply(1:nrow(x_matched),function(p0){
            list(
                battle_info=x_matched[p0,.SD,.SDcols=c(commonvarz,paste0(c(yvarz,tvarz_),"_diff"),"TOP10","TOP10_OFF","TOP10_DEF")] %>% t() %>% as.data.table(keep.rownames=TRUE) %>% data.table::setnames(c("rn","Info")),
                units_info=bind_rows(
                        x_matched[p0,] %>% .[,.SD,.SDcols=grep("_t$",names(.),value=T)] %>% data.table::setnames(gsub("_t$","",names(.))),
                        x_matched[p0,] %>% .[,.SD,.SDcols=grep("_c$",names(.),value=T)] %>% data.table::setnames(gsub("_c$","",names(.)))
                    ) %>% t() %>% as.data.table(keep.rownames=TRUE) %>% .[,Covariate := rn %>% match(c(idvarz,allvarz)) %>% c(idvarz,alllabz)[.]] %>% data.table::setnames(c("V1","V2"),c("Treated","Control")) %>% .[,.(Covariate,Treated,Control,rn)]
            )
        })
        return(pair_list)
    } else {return(x_matched)}
}

# Model ensemble (3xc)
ensemble_modz <- function(X_s,yvarz,model_re,model_fe,loo_type=""){
out <- parallel::mclapply(seq_along(yvarz),function(y0){#print(y0)
    tryCatch({

    if(sum(grepl("y_loo",model_fe))>0){
        X_s <- X_s %>% .[,y_loo :=get(gsub("_100","_LOO",yvarz[y0])%>%paste0(loo_type))]
    }

    # Unweighted
    suppressWarnings({
        suppressMessages({
            mod_xc <- lme4::lmer(model_re, data=X_s[,c("y","weights"):=.(get(yvarz[y0]),1)],weights=X_s$weights, control = lme4::lmerControl(optimizer ="Nelder_Mead"))  
        })
    })
    betas <- summary(mod_xc)["coefficients"][[1]] %>% .[grep("NKVD_",row.names(.)),]
    other_betas <- summary(mod_xc)["coefficients"][[1]] %>% .[!grepl("NKVD_",row.names(.)),]
    row.names(other_betas) <- row.names(other_betas)%>%gsub("^\\(|\\bscale\\(|\\blog\\(|\\)| \\+ 1","",.)
    other_betas <- lapply(1:nrow(other_betas),function(i0){
        out <- data.table::data.table(b=other_betas[i0,1],l=other_betas[i0,1]-other_betas[i0,2]*1.96,u=other_betas[i0,1]+other_betas[i0,2]*1.96)%>%data.table::setnames(paste0(names(.),"_",row.names(other_betas)[i0]))
    })%>% dplyr::bind_cols()
    iccs <- icc_fun(mod_xc) %>% .[,.(grp,icc)] %>% t() %>% data.table::as.data.table() %>% data.table::setnames(.[1,] %>% as.character() %>% paste0("icc_",.)) %>% .[-1]
    fitz <- summary(mod_xc)$optinfo$conv$lme4$messages %>% paste0(collapse="; ") %>% (function(.){ifelse(nchar(.)>0,1,0)})
    grp_xc <- attributes(mod_xc) %>% .["flist"] %>% .[[1]] %>% sapply(function(x){length(levels(x))})%>%.[order(names(.))]%>%t()%>%data.table::as.data.table()%>%data.table::setnames(paste0("N_",names(.)))
    m1 <- data.table::data.table(outcome_name=yvarz[y0],model_name="RE unweighted",b=betas[1],l=betas[1]-betas[2]*1.96,u=betas[1]+betas[2]*1.96, l90=betas[1]-betas[2]*1.68,u90=betas[1]+betas[2]*1.68, reml = summary(mod_xc)$AICtab,msg=fitz,aic=stats::AIC(mod_xc),bic=stats::BIC(mod_xc)) %>% dplyr::bind_cols(iccs,other_betas,grp_xc)%>%dplyr::select(names(.)[1:7],names(other_betas),dplyr::everything())
    # FE unweighted
    suppressWarnings({
        suppressMessages({
            mod_fe <- lfe::felm(model_fe, data = X_s[,c("Y","weights"):=.(get(yvarz[y0]),1)], weights = X_s$weights, exactDOF=T); summary(mod_fe)
        })
    })
    grp_fe <- mod_fe$fe %>% sapply(function(x){length(levels(x))})%>%.[order(names(.))]%>%t()%>%data.table::as.data.table()%>%data.table::setnames(paste0("N_",names(.)))
    ht <- phtest_felm(mod_xc,mod_fe)
    betas <- summary(mod_fe)["coefficients"][[1]] %>% .[grep("NKVD",row.names(.)),]
    other_betas <- summary(mod_fe)["coefficients"][[1]] %>% .[!grepl("NKVD",row.names(.)),]
    row.names(other_betas) <- row.names(other_betas)%>%gsub("^\\(|\\bscale\\(|\\blog\\(|\\)| \\+ 1","",.)
    other_betas <- lapply(1:nrow(other_betas),function(i0){
        out <- data.table::data.table(b=other_betas[i0,1],l=other_betas[i0,1]-other_betas[i0,2]*1.96,u=other_betas[i0,1]+other_betas[i0,2]*1.96)%>%data.table::setnames(paste0(names(.),"_",row.names(other_betas)[i0]))
    })%>% dplyr::bind_cols()
    m4 <- data.table::data.table(outcome_name=yvarz[y0],model_name="FE unweighted",b=betas[1],l=betas[1]-betas[2]*1.96,u=betas[1]+betas[2]*1.96, l90=betas[1]-betas[2]*1.68,u90=betas[1]+betas[2]*1.68,aic=stats::AIC(mod_fe),bic=stats::BIC(mod_fe)) %>% dplyr::bind_cols(other_betas,grp_fe)%>%dplyr::select(names(.)[1:7],names(other_betas),dplyr::everything())
    m4[,hausman_p := ht$p.value]%>%.[,n:=mod_fe["N"][[1]]]
    m1[,hausman_p := ht$p.value]%>%.[,n:=length(summary(mod_xc)$residuals)]

    # Pop weights
    suppressWarnings({
        suppressMessages({
            mod_xc <- lme4::lmer(model_re, data=X_s[,c("y","weights"):=.(get(yvarz[y0]),N_SOLDIERS)],weights=X_s$weights, control = lme4::lmerControl(optimizer ="Nelder_Mead"))
        })
    })
    grp_xc <- attributes(mod_xc) %>% .["flist"] %>% .[[1]] %>% sapply(function(x){length(levels(x))})%>%.[order(names(.))]%>%t()%>%data.table::as.data.table()%>%data.table::setnames(paste0("N_",names(.)))
    betas <- summary(mod_xc)["coefficients"][[1]] %>% .[grep("NKVD",row.names(.)),]
    other_betas <- summary(mod_xc)["coefficients"][[1]] %>% .[!grepl("NKVD",row.names(.)),]
    row.names(other_betas) <- row.names(other_betas)%>%gsub("^\\(|\\bscale\\(|\\blog\\(|\\)| \\+ 1","",.)
    other_betas <- lapply(1:nrow(other_betas),function(i0){
        out <- data.table::data.table(b=other_betas[i0,1],l=other_betas[i0,1]-other_betas[i0,2]*1.96,u=other_betas[i0,1]+other_betas[i0,2]*1.96)%>%data.table::setnames(paste0(names(.),"_",row.names(other_betas)[i0]))
    })%>% dplyr::bind_cols()
    iccs <- icc_fun(mod_xc) %>% .[,.(grp,icc)] %>% t() %>% data.table::as.data.table() %>% data.table::setnames(.[1,] %>% as.character() %>% paste0("icc_",.)) %>% .[-1]
    fitz <- summary(mod_xc)$optinfo$conv$lme4$messages %>% paste0(collapse="; ") %>% (function(.){ifelse(nchar(.)>0,1,0)})
    m3 <- data.table::data.table(outcome_name=yvarz[y0],model_name="RE pop weights",b=betas[1],l=betas[1]-betas[2]*1.96,u=betas[1]+betas[2]*1.96, l90=betas[1]-betas[2]*1.68,u90=betas[1]+betas[2]*1.68, reml = summary(mod_xc)$AICtab,msg=fitz,aic=stats::AIC(mod_xc),bic=stats::BIC(mod_xc)) %>% dplyr::bind_cols(iccs,other_betas,grp_xc)%>%dplyr::select(names(.)[1:7],names(other_betas),dplyr::everything())
    # FE pop
    suppressWarnings({
        suppressMessages({
            mod_fe <- lfe::felm(model_fe, data = X_s[,c("Y","weights"):=.(get(yvarz[y0]),N_SOLDIERS)], weights = X_s$weights, exactDOF=T); summary(mod_fe)
        })
    })
    grp_fe <- mod_fe$fe %>% sapply(function(x){length(levels(x))})%>%.[order(names(.))]%>%t()%>%data.table::as.data.table()%>%data.table::setnames(paste0("N_",names(.)))
    betas <- summary(mod_fe)["coefficients"][[1]] %>% .[grep("NKVD",row.names(.)),]
    other_betas <- summary(mod_fe)["coefficients"][[1]] %>% .[!grepl("NKVD",row.names(.)),]
    row.names(other_betas) <- row.names(other_betas)%>%gsub("^\\(|\\bscale\\(|\\blog\\(|\\)| \\+ 1","",.)
    other_betas <- lapply(1:nrow(other_betas),function(i0){
        out <- data.table::data.table(b=other_betas[i0,1],l=other_betas[i0,1]-other_betas[i0,2]*1.96,u=other_betas[i0,1]+other_betas[i0,2]*1.96)%>%data.table::setnames(paste0(names(.),"_",row.names(other_betas)[i0]))
    })%>% dplyr::bind_cols()
    m6 <- data.table(outcome_name=yvarz[y0],model_name="FE pop weights",b=betas[1],l=betas[1]-betas[2]*1.96,u=betas[1]+betas[2]*1.96, l90=betas[1]-betas[2]*1.68,u90=betas[1]+betas[2]*1.68,aic=stats::AIC(mod_fe),bic=stats::BIC(mod_fe)) %>% dplyr::bind_cols(other_betas,grp_fe)%>%dplyr::select(names(.)[1:7],names(other_betas),dplyr::everything())
    ht <- phtest_felm(mod_xc,mod_fe)
    m6[,hausman_p := ht$p.value]%>%.[,n:=mod_fe["N"][[1]]]
    m3[,hausman_p := ht$p.value]%>%.[,n:=length(summary(mod_xc)$residuals)]
    data.table::rbindlist(list(m1,m3,m4,m6),fill=T)
    },error=function(e){print(paste0(y0,"; ",e));NULL})
},mc.cores=min(length(yvarz),parallel::detectCores()/4)) %>% .[lengths(.) != 0] %>% data.table::rbindlist(fill=TRUE)
return(out)
}


#############################
# Table and graphics functions
#############################

# Simplified coefficient extraction (for fixest::feols only)
betamat <- function(mod,regx,model_name=NULL){
    if(is.null(model_name)){model_name <- "Model"}
    out <- lapply(1:length(mod),function(m0){
        grp_fe <- mod[[m0]][["fixef_sizes"]] %>% t() %>% data.table::as.data.table() %>% data.table::setnames(paste0(names(.),"_fe"))
        betas <- summary(mod[[m0]],robust=TRUE)[["coeftable"]] %>% .[grep(regx,row.names(.)),]
        if(!"matrix"%in%class(betas)){
            betas<-betas%>%t()%>%as.data.frame()
            row.names(betas) <- summary(mod[[m0]],robust=TRUE)[["coeftable"]] %>% row.names(.) %>% grep(regx,.,value=TRUE)
        }
        data.table(model_name=model_name,outcome_name=as.character(mod[[m0]][["fml"]])[2],x=row.names(betas)%>%gsub("^\\(|\\bscale\\(|\\blog\\(|\\)| \\+ 1","",.),b=betas[,1],l=betas[,1]-betas[,2]*1.96,u=betas[,1]+betas[,2]*1.96, l90=betas[,1]-betas[,2]*1.68,u90=betas[,1]+betas[,2]*1.68,r2=1-mod[[m0]][["ssr"]]/mod[[m0]][["ssr_null"]],aic=stats::AIC(mod[[m0]]),bic=stats::BIC(mod[[m0]]),n=mod[[m0]][["nobs"]]) %>% dplyr::bind_cols(grp_fe)
    })%>%data.table::rbindlist()
    return(out)
}


#############################
# Miscellaneous functions
#############################

# Web search
ddg.search <- function(search){browseURL(paste("http://www.duckduckgo.com/?q=", search, sep = ""))}
